import os
import sys
from math import floor
from Bio import SeqIO

target = sys.argv[1]
n = int(sys.argv[2])

if target == "genome":
    assembly = "hg38"
    fasta = "%s.fa" % assembly
    twobit = "%s.2bit" % assembly
    memory = "16G"
    flag = 4  # unmapped
else:
    fasta = "/osc-fs_home/mdehoon/Data/CASPARs/Filters/%s.fa" % target
    twobit = "%s.2bit" % target
    flag = 20  # unmapped, or mapped to the opposite strand
    if target == "fantomcat":
        memory = "32G"
    else:
        memory = "16G"

query_length = 33
score_threshold = 0.9

output = open('script.sh', 'w')
for readno in ("READ1", "READ2"):
    filename = "seqlist_%s.fa" % readno
    total = 0
    print("Reading", filename)
    records = SeqIO.parse(filename, 'fasta')
    for record in records:
        total += 1
    count = total // n
    remainder = total - count * n
    end = 0
    records = SeqIO.parse(filename, 'fasta')
    for i in range(n):
        start = end
        end = start + count
        if i < remainder:
            end += 1
        filename = "seqlist_%s_%d_%d.fa" % (readno, start, end)
        handle = open(filename, 'w')
        for j in range(start, end):
            record = next(records)
            handle.write(format(record, "fasta"))
        handle.close()
        script_rootname = "script_%s_%s_%d" % (target, readno, i)
        script_name = "%s.sh" % script_rootname
        handle = open(script_name, 'w')
        handle.write("""\
#!/bin/bash
#SBATCH -e ./%s.stderr
#SBATCH -o ./%s.stdout
#SBATCH --mem=%s
bwa mem -O 0 -E 1 -A 1 -B 1 -T 10 -k 10 -c 100000000 -a -Y %s seqlist_%s_%d_%d.fa | samtools view -F %d -u | bamToPsl - stdout | pslCheck stdin -pass=stdout -quiet 2> %s.%s_%d.out | sort -k 14 | pslRecalcMatch stdin %s seqlist_%s_%d_%d.fa stdout | sort -k 10 > %s.%s.%d-%d.psl
""" % (script_rootname, script_rootname, memory, fasta, readno, start, end, flag, target, readno, i, twobit, readno, start, end, target, readno, start, end))
        handle.close()
        output.write('sbatch %s\n' % script_name)
    assert end == total

output.close()
print("Scripts generated; run script.sh to schedule them on Grid Engine")
